pacman::p_load(tidyverse, data.table, broom, readxl,latex2exp, ggpubr, ggpmisc, 
               sf, raster, binsreg)
sf_use_s2(FALSE)
rm(list=ls())
`%!in%` = Negate(`%in%`)
################################################################################

#Spatial units

spatial_id_a = 'county_id'
spatial_id_b = 'amc_id'
spatial_id_agg = 'state_id'

df = fread(paste0('../../data/landuse/clean/geographicunits_argentina.csv.gz'))[, list(country, region, state_id, county_id)]
setnames(df, old=spatial_id_a, new=c('spatial_id'))
df = df[, list(spatial_id, region, state_id)]
df_a_u = unique(df, by = "spatial_id")
df = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, list(country, region, state_id, mesoregion_id, microregion_id, amc_id, county_id)]
setnames(df, old=spatial_id_b, new=c('spatial_id'))
df = df[, list(spatial_id, region, state_id)]
df_b_u = unique(df, by = "spatial_id")
df_cw = rbind(df_a_u,df_b_u)


##################### Land shares

for (country in c('argentina','brazil') ){
  
  df_l = fread(paste0('../../data/landuse/clean/landuse_',country,'_county.csv.gz'))
  df_u = fread(paste0('../../data/landuse/clean/geographicunits_',country,'.csv.gz'))

  if (country=='argentina'){
    
    #Spatial unit crosswalk
    df_u = df_u[, list(county_id, state_id, region, land_area_km2)]
    
    #Land
    df = merge(df_l, df_u, by='county_id')
    df = df[, c('forest','pasture') := list(forest_natural+forest_planted,pasture+forage_seasonal + forage_permanent)]
    setnames(df, old = c(spatial_id_a),  new = c('spatial_id'))
    df = df[,  list(year, region, state_id, spatial_id, pasture, forest,soybean, maize)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)][, unit:='hectares']
    
    setnames(df, old=c('soybean','maize','pasture'), new=c('soybean_ha','maize_ha','pasture_ha'))
    df[, c('agriculture_ha','nature_ha'):=list(soybean_ha+maize_ha+pasture_ha,forest)]
    df[, c('soybean','maize','pasture','share_agriculture'):=list(soybean_ha/agriculture_ha, maize_ha/agriculture_ha, pasture_ha/agriculture_ha, agriculture_ha/(agriculture_ha+nature_ha) )]
    df = df[,list(year, country, region, state_id, spatial_id, soybean, maize, pasture, share_agriculture)][is.finite(share_agriculture),]
    df_commodity = melt(df[,list(year, spatial_id, soybean, maize, pasture)], id.vars=c("year","spatial_id"), variable.name="commodity", value.name="share_commodity")
    df_nests = df[,list(year, country, region, state_id, spatial_id, share_agriculture)]
    df_land = merge(df_nests,df_commodity, by=c('year','spatial_id'))
    
    df_ar = df_land
    df_ar$country = 'ARGENTINA'
    
  }else{
    
    #Spatial unit crosswalk
    df_u = df_u[, list(county_id, amc_id, microregion_id, mesoregion_id, state_id, region, land_area_km2)]
    
    #Land
    df = merge(df_l, df_u, by='county_id')
    df = df[, c('pasture','forest') := list(pasture_natural+pasture_planted,forest_natural+forest_planted)]
    setnames(df, old = c(spatial_id_b),  new = c('spatial_id'))
    df = df[,  list(year, region, state_id, spatial_id, pasture, forest,soybean, maize)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)][, unit:='hectares']
    setnames(df, old=c('soybean','maize','pasture'), new=c('soybean_ha','maize_ha','pasture_ha'))
    df[, c('agriculture_ha','nature_ha'):=list(soybean_ha+maize_ha+pasture_ha,forest)]
    df[, c('soybean','maize','pasture','share_agriculture'):=list(soybean_ha/agriculture_ha, maize_ha/agriculture_ha, pasture_ha/agriculture_ha, agriculture_ha/(agriculture_ha+nature_ha) )]
    df = df[,list(year, country, region, state_id, spatial_id, soybean, maize, pasture, share_agriculture)][is.finite(share_agriculture),]
    df_commodity = melt(df[,list(year, spatial_id, soybean, maize, pasture)], id.vars=c("year","spatial_id"), variable.name="commodity", value.name="share_commodity")
    df_nests = df[,list(year, country, region, state_id, spatial_id, share_agriculture)]
    df_land = merge(df_nests,df_commodity, by=c('year','spatial_id'))

    df_br = df_land
    df_br$country = 'BRAZIL'
    
  }
}
df_land = rbind(df_ar,df_br)


##################### Output elasticities

df = fread('inputs/parameters_supply.csv')[,list(spatial_id, theta_lambda, theta, lambda, frontier)]
df_params = df[, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id)]
df = merge(df_land,df_params,  by=c('spatial_id'))
df_factors = setDT(read_excel('../../data/factors/raw/factor_shares.xlsx'))[,c('commodity','land_share')]
df = merge(df,df_factors, by=c('commodity'))
df[, c('elasticity_QP_i'):=list(  ( (theta_lambda-1)*(1-share_commodity) + (theta-1)*share_commodity*(1-share_agriculture)+(1-land_share) )/land_share ) ]
df[, c('elasticity_PQ_i'):=list( 1/elasticity_QP_i )]
df[commodity=='pasture',]$commodity = 'beef'
df_elasticity = df


##################### Market shares

id_type_var = c('county_known')
exporter_type = c('exporter_group') 
destination_type = 'destination_bloc'
year_max=2018

for (country in c('argentina','brazil') ){
  
  if (country=='argentina'){
    
    df = fread(paste0('../../data/agribusiness/clean/soybean_',country,'.csv.gz'))[id_type %in% id_type_var & state %!in% c('UNKNOWN','IMPORTS + STOCK') & destination_bloc != 'UNKNOWN',]
    setnames(df, old = c(spatial_id_a,exporter_type,destination_type),  new = c('spatial_id','firm','destination_unit'))
    
    df_n_ij = df[, list(year, spatial_id, destination_unit, commodity, firm)][ , .(N_ij = length(unique(firm))), by=list(year, spatial_id, destination_unit, commodity)]
    df_n_i = df[, list(year, spatial_id, commodity, firm)][ , .(N_i = length(unique(firm))), by=list(year, spatial_id, commodity)]
    df_n = merge(df_n_ij,df_n_i, by=c('year','spatial_id','commodity'), all.x=T)
    
    df_q_ij = df[, list(year, country, region, state_id, spatial_id, destination_unit, equivalent_tonnes, fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, destination_unit, commodity)]
    df_q_i = df[, list(year, country, region, state_id, spatial_id, equivalent_tonnes, fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, commodity)]
    df_s = merge(df_q_ij,df_q_i, by=c('year','country','region','state_id','spatial_id','commodity'))[, c('share_ij_tonnes','share_ij_usd','price_ij') := list(equivalent_tonnes.x/equivalent_tonnes.y,fob_usd.x/fob_usd.y,fob_usd.x/equivalent_tonnes.x)][, list(year, country, region, state_id, spatial_id, destination_unit, share_ij_tonnes, commodity)]
    
    df_a_t = merge(df_s,df_n, by=c('year','spatial_id','destination_unit','commodity'))
    df_a = df_a_t[year<=year_max,][, c("year") := NULL][, lapply(.SD, mean, na.rm=TRUE),  by=list(country, region, state_id, spatial_id, destination_unit, commodity)]
    
  }else{
    
    n_c=1
    for (commodity in c('soybean','maize','beef')) {
      
      if(commodity=='beef'){
        df = fread(paste0('../../data/agribusiness/clean/',commodity,'_',country,'.csv.gz'))[product_type %in% c('Fresh & frozen beef products','BEEF BONELESS') & id_type %in% id_type_var & state !='UNKNOWN STATE'  & destination != 'BRAZIL' & destination_bloc != 'UNKNOWN',]
        df$port = df$hub
      }else{
        df = fread(paste0('../../data/agribusiness/clean/',commodity,'_',country,'.csv.gz'))[id_type %in% id_type_var & state !='UNKNOWN STATE'  & destination != 'BRAZIL' & destination_bloc != 'UNKNOWN',]
      }
      setnames(df, old = c(spatial_id_b,exporter_type,destination_type),  new = c('spatial_id','firm','destination_unit'))
      
      df_n_ij = df[, list(year, spatial_id, destination_unit, commodity, firm)][ , .(N_ij = length(unique(firm))), by=list(year, spatial_id, destination_unit, commodity)]
      df_n_i = df[, list(year, spatial_id, commodity, firm)][ , .(N_i = length(unique(firm))), by=list(year, spatial_id, commodity)]
      df_n = merge(df_n_ij,df_n_i, by=c('year','spatial_id','commodity'), all.x=T)
      
      df_q_ij = df[, list(year, country, region, state_id, spatial_id, destination_unit, equivalent_tonnes, fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, destination_unit, commodity)]
      df_q_i = df[, list(year, country, region, state_id, spatial_id,equivalent_tonnes,fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, commodity)]
      df_s = merge(df_q_ij,df_q_i, by=c('year','country','region','state_id','spatial_id','commodity'))[, c('share_ij_tonnes','share_ij_usd','price_ij') := list(equivalent_tonnes.x/equivalent_tonnes.y,fob_usd.x/fob_usd.y,fob_usd.x/equivalent_tonnes.x)][, list(year, country, region, state_id, spatial_id, destination_unit, share_ij_tonnes, commodity)]
      
      df_b_t_c = merge(df_s,df_n, by=c('year','spatial_id','destination_unit','commodity'))
      df_b_c = df_b_t_c[year<=year_max,][, c("year") := NULL][, lapply(.SD, mean, na.rm=TRUE),  by=list(country, region, state_id, spatial_id, destination_unit, commodity)]
      
      if(n_c==1){
        df_b_t = df_b_t_c
        df_b = df_b_c
      }else{
        df_b_t = rbind(df_b_t,df_b_t_c)
        df_b = rbind(df_b,df_b_c)
      }
      n_c=n_c+1
    } 
  }
}

df = df_a_t[year==2018,]
df = merge(df, df_a_u, by=c('spatial_id','region','state_id'), all.y=T)
df$year=2018
df[is.na(N_i),]$N_i= quantile(df$N_i,0.025,na.rm=T)
df[is.na(N_ij),]$N_ij = quantile(df$N_ij,0.025,na.rm=T) 

df$commodity='soybean'
df$country='ARGENTINA'
df1 = df
df2 = df
df1$commodity='maize'
df2$commodity='beef'
df_a_t = rbind(df,df1,df2)

df_shares = rbind(df_a_t,df_b_t)


#####################  Markdowns

df = merge(df_shares, df_elasticity, by=c('commodity','year','country','region','state_id','spatial_id'))
df$mu_i = (1+df$elasticity_PQ_i/df$N_i)^(-1)
df[N_i==0,]$mu_i=NA
df$N_i = as.numeric(df$N_i)
df_final = df

df1 = df_final[year==2018 & country=='ARGENTINA',]
df2 = df_final[year==2017 & country=='BRAZIL',]
df=rbind(df1,df2)
df  = df[, list(year, commodity, country, region, spatial_id, N_i, share_commodity, share_agriculture, elasticity_QP_i, mu_i)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, commodity, country, region, spatial_id)]
df_i = melt(df, id.vars=c("year","commodity","country","region","spatial_id"), variable.name="value_type", value.name="value")[!is.na(value),]

##### Correlation with distance to hub/port

#Crosswalk to aggregate spatial units
df = fread(paste0('../../data/landuse/clean/geographicunits_argentina.csv.gz'))[, list(county_id)]
df$spatial_id = df$county_id
df_a_county = df
df = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, list(country, region, state_id, mesoregion_id, microregion_id, amc_id, county_id)]
df$spatial_id = df$amc_id
df_b_county = df[, list(county_id, spatial_id)]
df_u_county=rbind(df_a_county,df_b_county)

#Market access measures
df = fread(paste0('../../data/agribusiness/clean/distance_to_hub_all.csv.gz'))
df_ma = merge(df,df_u_county,by='county_id')[, list(spatial_id, ma, commodity, ma_type, product_type, size_q_i)]

#Scatterplot
ma_type_plot = "all_hubs_weighted"
df = df_ma[ma_type==ma_type_plot,][,list(commodity,spatial_id,ma,size_q_i)][, lapply(.SD, mean), by=.(commodity,spatial_id)]
df=merge(df_i[value_type=='mu_i',], df, by=c('commodity','spatial_id'))
df$y_var=log(df$value)
df$x_var=log(df$ma)
df$size_var= df$size_q_i

for(commodity_var in c('beef','soybean','maize')){

  df_scatter = df[commodity==commodity_var,]
  x_max = quantile(df_scatter$x_var, 0.975, na.rm=T)
  x_min = quantile(df_scatter$x_var, 0.025, na.rm=T)
  y_max = quantile(df_scatter$y_var, 0.975, na.rm=T)
  y_min = quantile(df_scatter$y_var, 0.025, na.rm=T)
  df_scatter_rm_outliers = df_scatter[x_var<x_max & x_var>x_min,][y_var<y_max & y_var>y_min,]
  
  fit_formula <- y ~ x
  fig = ggplot(data = df_scatter_rm_outliers, aes(x = x_var, y=y_var) ) +
    stat_smooth(method = "lm", formula = y ~ x + I(x^2), aes(weight=size_var), color='black', linewidth = 1, se=F)+
    theme_minimal() +
    labs(x='log(market access)', y=TeX('log(markdown \\mu)'), title = paste0('Upstream ',commodity_var,' markets' ) )+
    theme(axis.title.x = element_text(hjust = 1, vjust=-1, size=9),
          axis.title.y = element_text(hjust = 0.95,  vjust=2, size=9),
          axis.text.x = element_text(size=8),
          axis.text.y = element_text(size=8),
          plot.title = element_text(size=10),
          aspect.ratio = 1,
          panel.grid.minor = element_blank())+
    stat_summary_bin(fun='mean', bins=20, color='darkred', size=2, geom='point')+
    guides(size = "none")
  
  fig_formatted = ggplot(data = df_scatter_rm_outliers, aes(x = x_var, y=y_var) ) +
    stat_smooth(method = "lm", formula = y ~ x + I(x^2), aes(weight=size_var), color='black', linewidth = 1, se=F)+
    theme_minimal() +
    labs(x='log(market access)', y=TeX('log(markdown \\mu)'), title='B. Corr. w/market access')+
    theme(axis.title.x = element_text(hjust = 1, vjust=-1, size=8),
          axis.title.y = element_text(hjust = 0.9, vjust=0, size=8),
          axis.text.x = element_text(size=7),
          axis.text.y = element_text(size=7),
          plot.title = element_text(size=9, hjust=2),
          aspect.ratio = 1.25,
          panel.grid.minor = element_blank())+
    stat_summary_bin(fun='mean', bins=20, color='black', size=1, geom='point')+
    guides(size = "none")

  if(commodity_var=='beef'){fig_b=fig; df_scatter_b=df_scatter }
  if(commodity_var=='soybean'){fig_s=fig; df_scatter_s=df_scatter }
  if(commodity_var=='maize'){fig_m=fig; df_scatter_m=df_scatter }
  
  if(commodity_var=='beef'){figure3b=fig_formatted}

}

fig_scatter = ggarrange(fig_b,fig_s,fig_m,nrow=1,ncol=3)
ggsave(paste0("../../output/figures/appendix/figure4a_corrma.pdf"), width = 9, height = 3)
ggsave(paste0("../../output/figures/appendix/figure4a_corrma.eps"), width = 9, height = 3)




##### Markdown maps

#Shapefiles
shpfile = sf::read_sf("../../data/shapefiles/clean/argentinabrazil_all.shp")
shpfile <- st_crop(shpfile, extent(-85,-33, -65, 11.75)) 
names(shpfile) <- c("country","state","state_id","county","county_id","region_id","region_id","geometry")
shpfile <- shpfile[,(names(shpfile) %in% c(spatial_id_agg,"geometry"))]
setnames(shpfile, old=spatial_id_agg, new='spatial_id')
shpfile <- shpfile %>% dplyr::group_by(spatial_id) %>% dplyr::summarise()
shpfile <- st_simplify(shpfile, dTolerance = 0.1, preserveTopology = T)

shpfile_ctry = sf::read_sf("../../data/shapefiles/clean/restofsouthamerica_country.shp")
shpfile_ctry <- shpfile_ctry[,(names(shpfile_ctry) %in% c("ctry","geometry"))]
shpfile_ctry <- st_simplify(shpfile_ctry, dTolerance = 0.1, preserveTopology = T)

#Data
commodity_map = 'beef'
plot_variable = 'mu_i'
df_full = df_i[value_type==plot_variable & commodity==commodity_map,]
df = merge(df_full, df_cw, by="spatial_id")
df = df[,-c('spatial_id')]
setnames(df, old=c(spatial_id_agg), new=c('spatial_id'))
df = df[,list(spatial_id, value)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id)]
df = merge(shpfile, df, by="spatial_id", all.y=T)

figure3a = ggplot() +
  geom_sf(data=shpfile, fill=NA, color = 'gray', linewidth = 0.05) +
  geom_sf(data=shpfile_ctry, fill=NA, color = 'black', linewidth = 0.05) +
  geom_sf(data=df, aes(fill=as.vector(value), color=after_scale(fill)), lwd = 0.05)+
  scale_fill_viridis_c(option='inferno', direction=-1) +
  theme_void() +
  labs(fill=TeX('\\mu'), title=TeX('A. Markdown (\\mu) distribution'))+
  theme(legend.position = "right",  legend.direction = "vertical", 
        legend.text = element_text(size=7),
        legend.key.size = unit(0.7, "cm"),
        legend.key.width = unit(0.2,"cm"),
        legend.title = element_text(size=9),
        plot.title =  element_text(size=9, hjust=0))

figure3a_bw = ggplot() +
  geom_sf(data=shpfile, fill=NA, color = 'gray', linewidth = 0.05) +
  geom_sf(data=shpfile_ctry, fill=NA, color = 'black', linewidth = 0.05) +
  geom_sf(data=df, aes(fill=as.vector(value), color=after_scale(fill)), lwd = 0.05)+
  scale_fill_fermenter(palette = "Greys",direction=1) +
  theme_void() +
  labs(fill=TeX('\\mu'), title=TeX('A. Markdown (\\mu) distribution'))+
  theme(legend.position = "right",  legend.direction = "vertical", 
        legend.text = element_text(size=7),
        legend.key.size = unit(0.7, "cm"),
        legend.key.width = unit(0.15,"cm"),
        legend.title = element_text(size=9),
        plot.title =  element_text(size=9, hjust=0))




